In [3]:
import pandas as pd
import os
import numpy as np
#import pandas_profiling
In [5]:
path = 'DataSet/Original'
file = 'demographics.csv'

demographics = pd.read_csv(path + '/' + file, sep = ',', doublequote = True)

demographics.shape
Out[5]:
(188, 33)
In [4]:
demographics.head()
Out[4]:
nta_name borough nta_code population under_5_years 5-9_years 10-14_years 15-19_years 20-24_years 25-29_years ... 15000_to_24999 25000_to_34999 35000_to_49999 50000_to_74999 75000_to_99999 100000_to_149999 150000_to_199999 200000_or_more median_income mean_income
0 Allerton-Pelham Gardens Bronx BX31 28903 1679 1706 1763 2039 1964 1703 ... 797 773 1160 1764 1155 1562 765 427 61638 78489
1 Annadale-Huguenot-Prince's Bay-Eltingville Staten Island SI01 27770 1397 1698 1817 1880 1720 1594 ... 571 405 1008 1523 1346 2075 1086 1151 88288 109187
2 Arden Heights Staten Island SI48 25238 1507 1540 1596 1752 1614 1561 ... 337 516 707 1421 1611 2021 1047 740 89570 101627
3 Astoria Queens QN70 78793 3480 3037 3060 3392 6630 11586 ... 3673 2816 4725 6463 4557 4698 1627 1197 54882 70094
4 Auburndale Queens QN48 19996 917 966 1063 1168 1214 1307 ... 445 632 690 1417 1060 1237 589 433 70772 84402

5 rows × 33 columns

In [10]:
demographics.borough.value_counts()
Out[10]:
Queens           56
Brooklyn         50
Bronx            36
Manhattan        28
Staten Island    18
Name: borough, dtype: int64
In [13]:
demographics['borough'] = demographics['borough'].astype('category')
In [14]:
demographics.dtypes
Out[14]:
nta_name              object
borough             category
nta_code              object
population             int64
under_5_years          int64
5-9_years              int64
10-14_years            int64
15-19_years            int64
20-24_years            int64
25-29_years            int64
30-34_years            int64
35-39_years            int64
40-44_years            int64
45-49_years            int64
50-54_years            int64
55-59_years            int64
60-64_years            int64
over_65_years          int64
median_age             int64
people_per_acre      float64
households             int64
less_than_10,000       int64
10000_to_14999         int64
15000_to_24999         int64
25000_to_34999         int64
35000_to_49999         int64
50000_to_74999         int64
75000_to_99999         int64
100000_to_149999       int64
150000_to_199999       int64
200000_or_more         int64
median_income          int64
mean_income            int64
dtype: object
In [15]:
demographics.profile_report()
Out[15]:

In [17]:
demographics.describe()
Out[17]:
population under_5_years 5-9_years 10-14_years 15-19_years 20-24_years 25-29_years 30-34_years 35-39_years 40-44_years ... 15000_to_24999 25000_to_34999 35000_to_49999 50000_to_74999 75000_to_99999 100000_to_149999 150000_to_199999 200000_or_more median_income mean_income
count 188.000000 188.00000 188.000000 188.000000 188.000000 188.000000 188.000000 188.000000 188.000000 188.000000 ... 188.000000 188.000000 188.000000 188.000000 188.000000 188.000000 188.000000 188.000000 188.000000 188.000000
mean 43397.175532 2752.12766 2515.478723 2489.159574 2842.632979 3404.617021 3872.106383 3512.468085 3116.265957 3007.898936 ... 1750.861702 1487.734043 1909.824468 2544.452128 1803.074468 2078.186170 939.824468 1224.664894 57002.489362 77808.877660
std 21288.062949 1695.80947 1496.808314 1459.440306 1574.371410 1886.482082 2462.683806 2113.358863 1704.685137 1531.520696 ... 1065.437156 893.651778 1076.642531 1415.221055 1041.592900 1430.129096 890.562915 2315.006940 24341.286390 38791.512605
min 13354.000000 506.00000 408.000000 326.000000 449.000000 798.000000 736.000000 742.000000 689.000000 743.000000 ... 191.000000 371.000000 496.000000 782.000000 241.000000 201.000000 20.000000 8.000000 20334.000000 29720.000000
25% 27237.000000 1490.50000 1443.500000 1437.750000 1619.000000 1929.500000 2089.250000 1928.000000 1835.750000 1867.500000 ... 930.000000 827.750000 1139.500000 1504.750000 1111.000000 1220.250000 426.500000 248.750000 39015.000000 54656.750000
50% 37897.000000 2386.00000 2228.500000 2197.500000 2568.000000 2956.000000 3081.500000 2893.000000 2555.500000 2520.000000 ... 1504.500000 1209.000000 1629.000000 2134.000000 1580.000000 1819.500000 760.000000 517.500000 53114.500000 70396.000000
75% 54244.750000 3579.25000 3242.750000 3263.750000 3717.250000 4527.000000 5252.750000 4611.000000 4034.500000 3778.500000 ... 2235.250000 2066.500000 2494.250000 3117.500000 2328.500000 2462.500000 1068.750000 963.000000 71430.750000 89989.500000
max 132378.000000 14703.00000 11971.000000 10024.000000 9094.000000 10046.000000 11971.000000 11292.000000 10406.000000 10004.000000 ... 5231.000000 4549.000000 5901.000000 7655.000000 6109.000000 9962.000000 6359.000000 15031.000000 155213.000000 311109.000000

8 rows × 30 columns

In [24]:
demographics.isna().sum()
Out[24]:
nta_name            0
borough             0
nta_code            0
population          0
under_5_years       0
5-9_years           0
10-14_years         0
15-19_years         0
20-24_years         0
25-29_years         0
30-34_years         0
35-39_years         0
40-44_years         0
45-49_years         0
50-54_years         0
55-59_years         0
60-64_years         0
over_65_years       0
median_age          0
people_per_acre     0
households          0
less_than_10,000    0
10000_to_14999      0
15000_to_24999      0
25000_to_34999      0
35000_to_49999      0
50000_to_74999      0
75000_to_99999      0
100000_to_149999    0
150000_to_199999    0
200000_or_more      0
median_income       0
mean_income         0
dtype: int64
In [ ]: